##
### GLMMs and GEEs with Epilepsy Data
##
##### Load Required Libraries #####
library(tidyverse) # ggplot2, tidyr
library(lme4) # Functions: glmer
library(gee) # Functions: gee
##
#### Read in data set into R:
##
epilepsy <- read.table("http://www.hsph.harvard.edu/fitzmaur/ala2e/epilepsy.dat",
header=FALSE)
names(epilepsy) <- c("ID","trt","age","Week0","Week2","Week4","Week6","Week8")
epilepsy$trt <- factor(epilepsy$trt, levels=c(0,1), labels=c("Placebo","Progabide"))
## Convert to long form:
epi_long <- pivot_longer(epilepsy,
cols = 4:8,
names_to = "Time", names_prefix = "Week",
values_to = "Count")
epi_long$Time <- as.numeric(epi_long$Time)
## Create new variables
epi_long <- epi_long %>% mutate(
PostBase = as.numeric(Time != 0),
Weeks = 8*(PostBase==0) + 2*(PostBase==1)
)
##### Exploratory Data Analysis #####
## Number of seizures by group by time period:
boxplot(Count ~ trt+Time, at=c(1,2,4,5,7,8,10,11,13,14), data=epi_long, col=c("red","blue"), ylab="Number of Seizures", xlab="", names=rep(c("Plac","Prog"),5), main="Number of Seizures by Group and Week")
mtext( c("Baseline",paste("Week",c(2,4,6,8))), side=1, at=c(1.5,4.5,7.5,10.5,13.5), line=3 )

# Calculate rates per week:
epi_long <- epi_long %>% mutate(
Rate = case_when(
Time == 0 ~ Count/8,
Time != 0 ~ Count/2
)
)
library(yarrr)
pirateplot(Count ~ trt+Time, data = epi_long, inf.method = "ci", inf.disp = "line")

pirateplot(Rate ~ trt+Time, data = epi_long, inf.method = "ci", inf.disp = "line")

pirateplot(log1p(Rate) ~ trt+Time, data = epi_long, inf.method = "ci", inf.disp = "line")

# Plot of mean rates:
means <- tapply(epi_long$Rate, list(epi_long$Time,epi_long$trt), mean)
matplot(matrix(c(0,2,4,6,8)), means,
col=c(1,1), lty=c(3,1), type="o",
pch=c(1,16), xlab="Time (weeks)",
ylab="Mean rate of seizures (per week)",
ylim=c(2.5,5.0),
main="Mean Rate of Seizures by Treatment Group")
legend(3.5, 3.0, c("Placebo","Progabide"), lty=c(3,1))

## Plots of individual counts:
matplot(matrix(c(0,2,4,6,8)), t(epilepsy[,4:8]),
col=(as.numeric(epilepsy$trt=="Placebo")+2), type="l", ylim = c(0,180),
xlab="Week",ylab="No. of Seizures ", main="Response Profiles by ID and Treatment")
legend(5,150,c("Placebo","Progabide"),col=c(2,3),lty=c(1,1))

library(GGally)
epilepsy %>% ggparcoord(4:8, scale = "globalminmax", group = "trt") +
theme_bw() +
facet_wrap(~trt)

# Who is the outlier at baseline in the Progabide group?
plot(Count[trt=="Progabide"] ~ Time[trt=="Progabide"],
xlab="Week", ylab="Seizures",
main="Counts of Seizures for Progabide Group", col="blue",
data=epi_long)
points(Count[trt=="Placebo"] ~ Time[trt=="Placebo"], pch=2,
col="red", data=epi_long)
identify(epi_long$Count ~ epi_long$Time, labels=epi_long$ID)

## integer(0)
# ID 49 data:
epi_long[epi_long$ID==49,]
## # A tibble: 5 × 8
## ID trt age Time Count PostBase Weeks Rate
## <int> <fct> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 49 Progabide 22 0 151 0 8 18.9
## 2 49 Progabide 22 2 102 1 2 51
## 3 49 Progabide 22 4 65 1 2 32.5
## 4 49 Progabide 22 6 72 1 2 36
## 5 49 Progabide 22 8 63 1 2 31.5
# This patient could have a large impact on the analysis -
# Book does analysis with and without ID 49.
##### Fitting GLMMs #####
# Since the number of weeks each count refers to differs
# (8 weeks for baseline by 2 weeks afterwards)
# we need to include an "offset" --> Model mean rate per week
# With glmer (and lmer) function, random effects specified in parentheses:
?lmer
?glmer
## Fit GLMM
epi_long <- epi_long %>% mutate(PostBase = factor(PostBase))
mod1 <- glmer(Count ~ trt*PostBase + (PostBase | ID), offset=log(Weeks),
family=poisson, data=epi_long)
summary(mod1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Count ~ trt * PostBase + (PostBase | ID)
## Data: epi_long
## Offset: log(Weeks)
##
## AIC BIC logLik deviance df.resid
## 1864.4 1890.2 -925.2 1850.4 288
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1394 -0.7073 -0.0620 0.5138 6.9653
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.4999 0.7070
## PostBase1 0.2319 0.4816 0.16
## Number of obs: 295, groups: ID, 59
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0708453 0.1402715 7.634 2.27e-14
## trtProgabide 0.0512167 0.1927136 0.266 0.7904
## PostBase1 -0.0004996 0.1091006 -0.005 0.9963
## trtProgabide:PostBase1 -0.3062158 0.1504205 -2.036 0.0418
##
## Correlation of Fixed Effects:
## (Intr) trtPrg PstBs1
## trtProgabid -0.725
## PostBase1 0.011 -0.013
## trtPrgb:PB1 -0.014 0.025 -0.709
plot(allEffects(mod1, residuals = T), type = "response", x.var = "PostBase") #Issues in scaling with offset in Poisson rate models

# Estimated random effects
ranef(mod1)
## $ID
## (Intercept) PostBase1
## 1 -0.60197367 0.053075955
## 2 -0.60197367 0.053075955
## 3 -0.95584893 0.112780207
## 4 -0.78751955 0.127425353
## 5 0.99154662 -0.116149178
## 6 0.10948363 -0.138443638
## 7 -0.57934911 -0.079468569
## 8 0.81464415 0.566916757
## 9 -0.01965844 -0.032916035
## 10 -0.46624570 0.706002860
## 11 0.78554644 0.243655494
## 12 0.31770399 -0.025038531
## 13 -0.25263814 -0.103699518
## 14 0.55723001 0.033499563
## 15 1.26048139 -0.300455313
## 16 0.64351429 -0.804224609
## 17 -0.38232600 -0.620312840
## 18 1.52980607 0.133760130
## 19 -0.26255897 -0.145316438
## 20 -0.17314632 -0.164548924
## 21 -0.54995950 0.015238657
## 22 -0.71194460 0.132566532
## 23 -0.32687377 -0.202650252
## 24 0.17754869 0.066827808
## 25 0.89477469 0.893345724
## 26 -0.88246585 -0.321623991
## 27 -0.72517425 -0.108837282
## 28 0.67746017 0.137684040
## 29 1.07892366 -0.202875873
## 30 0.41195329 0.028095350
## 31 -0.32260666 -0.412393844
## 32 -0.65611910 0.221362632
## 33 -0.18652492 0.186766964
## 34 -0.07899799 -0.305198729
## 35 0.35362987 0.996654891
## 36 -0.37841126 0.381263969
## 37 -0.65496454 0.021082513
## 38 0.92628753 -0.532610668
## 39 0.48171433 -0.001086379
## 40 -1.07529125 -0.273473810
## 41 -0.21504353 -0.556473937
## 42 -0.51869848 0.063582682
## 43 0.65368382 0.589958949
## 44 0.35793214 0.010241639
## 45 0.44457601 0.295731188
## 46 -0.94074736 0.042165802
## 47 0.37897839 0.173507511
## 48 -0.83298727 -0.495214546
## 49 1.81733174 0.986679568
## 50 -0.12992207 -0.153557314
## 51 0.49339327 -0.103671459
## 52 0.16410222 -0.536852792
## 53 0.82987828 0.497735321
## 54 -0.05966416 -0.204850411
## 55 -0.34384372 0.112038425
## 56 0.06474998 0.892379169
## 57 -0.10209409 -0.626688165
## 58 -0.81580028 -0.856342240
## 59 -0.60188743 -0.013537367
##
## with conditional variances for "ID"
library(lattice)
dotplot(ranef(mod1, postVar = T))
## $ID

# Subject-specific coefficients (beta_k + b_ki)
coef(mod1)
## $ID
## (Intercept) trtProgabide PostBase1 trtProgabide:PostBase1
## 1 0.468871652 0.0512167 0.052576363 -0.3062158
## 2 0.468871652 0.0512167 0.052576363 -0.3062158
## 3 0.114996393 0.0512167 0.112280615 -0.3062158
## 4 0.283325775 0.0512167 0.126925761 -0.3062158
## 5 2.062391945 0.0512167 -0.116648770 -0.3062158
## 6 1.180328951 0.0512167 -0.138943230 -0.3062158
## 7 0.491496211 0.0512167 -0.079968161 -0.3062158
## 8 1.885489473 0.0512167 0.566417165 -0.3062158
## 9 1.051186885 0.0512167 -0.033415627 -0.3062158
## 10 0.604599627 0.0512167 0.705503268 -0.3062158
## 11 1.856391768 0.0512167 0.243155901 -0.3062158
## 12 1.388549310 0.0512167 -0.025538124 -0.3062158
## 13 0.818207179 0.0512167 -0.104199110 -0.3062158
## 14 1.628075335 0.0512167 0.032999971 -0.3062158
## 15 2.331326719 0.0512167 -0.300954905 -0.3062158
## 16 1.714359615 0.0512167 -0.804724201 -0.3062158
## 17 0.688519325 0.0512167 -0.620812432 -0.3062158
## 18 2.600651394 0.0512167 0.133260538 -0.3062158
## 19 0.808286356 0.0512167 -0.145816030 -0.3062158
## 20 0.897698999 0.0512167 -0.165048516 -0.3062158
## 21 0.520885828 0.0512167 0.014739065 -0.3062158
## 22 0.358900720 0.0512167 0.132066939 -0.3062158
## 23 0.743971552 0.0512167 -0.203149844 -0.3062158
## 24 1.248394014 0.0512167 0.066328216 -0.3062158
## 25 1.965620013 0.0512167 0.892846132 -0.3062158
## 26 0.188379477 0.0512167 -0.322123583 -0.3062158
## 27 0.345671073 0.0512167 -0.109336875 -0.3062158
## 28 1.748305496 0.0512167 0.137184448 -0.3062158
## 29 2.149768982 0.0512167 -0.203375466 -0.3062158
## 30 1.482798609 0.0512167 0.027595757 -0.3062158
## 31 0.748238663 0.0512167 -0.412893437 -0.3062158
## 32 0.414726224 0.0512167 0.220863039 -0.3062158
## 33 0.884320408 0.0512167 0.186267372 -0.3062158
## 34 0.991847334 0.0512167 -0.305698321 -0.3062158
## 35 1.424475195 0.0512167 0.996155299 -0.3062158
## 36 0.692434064 0.0512167 0.380764376 -0.3062158
## 37 0.415880782 0.0512167 0.020582920 -0.3062158
## 38 1.997132852 0.0512167 -0.533110260 -0.3062158
## 39 1.552559658 0.0512167 -0.001585971 -0.3062158
## 40 -0.004445929 0.0512167 -0.273973402 -0.3062158
## 41 0.855801789 0.0512167 -0.556973529 -0.3062158
## 42 0.552146840 0.0512167 0.063083090 -0.3062158
## 43 1.724529148 0.0512167 0.589459357 -0.3062158
## 44 1.428777466 0.0512167 0.009742047 -0.3062158
## 45 1.515421337 0.0512167 0.295231596 -0.3062158
## 46 0.130097959 0.0512167 0.041666210 -0.3062158
## 47 1.449823713 0.0512167 0.173007919 -0.3062158
## 48 0.237858057 0.0512167 -0.495714138 -0.3062158
## 49 2.888177063 0.0512167 0.986179975 -0.3062158
## 50 0.940923249 0.0512167 -0.154056907 -0.3062158
## 51 1.564238596 0.0512167 -0.104171052 -0.3062158
## 52 1.234947547 0.0512167 -0.537352384 -0.3062158
## 53 1.900723600 0.0512167 0.497235728 -0.3062158
## 54 1.011181162 0.0512167 -0.205350004 -0.3062158
## 55 0.727001607 0.0512167 0.111538833 -0.3062158
## 56 1.135595306 0.0512167 0.891879577 -0.3062158
## 57 0.968751233 0.0512167 -0.627187757 -0.3062158
## 58 0.255045048 0.0512167 -0.856841832 -0.3062158
## 59 0.468957890 0.0512167 -0.014036959 -0.3062158
##
## attr(,"class")
## [1] "coef.mer"
# Treating time as quantitative:
mod2 <- glmer(Count ~ trt*Time + (Time | ID), offset=log(Weeks),
family=poisson, data=epi_long)
summary(mod2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Count ~ trt * Time + (Time | ID)
## Data: epi_long
## Offset: log(Weeks)
##
## AIC BIC logLik deviance df.resid
## 1924.2 1950.0 -955.1 1910.2 288
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3786 -0.7228 -0.1173 0.5846 6.6309
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.526863 0.72585
## Time 0.005029 0.07091 0.22
## Number of obs: 295, groups: ID, 59
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.10395 0.14260 7.741 9.83e-15
## trtProgabide 0.01750 0.19632 0.089 0.9290
## Time -0.01133 0.01681 -0.674 0.5004
## trtProgabide:Time -0.04669 0.02335 -2.000 0.0456
##
## Correlation of Fixed Effects:
## (Intr) trtPrg Time
## trtProgabid -0.724
## Time 0.065 -0.053
## trtPrgbd:Tm -0.054 0.074 -0.694
plot(allEffects(mod2, residuals = T), type = "link")

# How does the interpretation of coefficients change?
# Note larger AIC
##### Fitting Marginal Models #####
?gee
mod.gee <- gee(Count ~ trt*PostBase + offset(log(Weeks)),
id = ID, family = poisson(link = "log"),
corstr = "exchangeable", data = epi_long)
## (Intercept) trtProgabide PostBase1
## 1.34760922 0.02753449 0.11183602
## trtProgabide:PostBase1
## -0.10472579
# corstr="exchangeable" --> compound symmetry covariance structure.
# family=poisson --> Poisson variance function (not distribution)
summary(mod.gee)
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logarithm
## Variance to Mean Relation: Poisson
## Correlation Structure: Exchangeable
##
## Call:
## gee(formula = Count ~ trt * PostBase + offset(log(Weeks)), id = ID,
## data = epi_long, family = poisson(link = "log"), corstr = "exchangeable")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -4.303571 -1.303571 2.016129 10.370392 147.044355
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## (Intercept) 1.34760922 0.1510969 8.9188397 0.1573571 8.5640166
## trtProgabide 0.02753449 0.2071018 0.1329515 0.2217878 0.1241479
## PostBase1 0.11183602 0.1545145 0.7237900 0.1159304 0.9646821
## trtProgabide:PostBase1 -0.10472579 0.2197052 -0.4766650 0.2134448 -0.4906459
##
## Estimated Scale Parameter: 19.6797
## Number of Iterations: 1
##
## Working Correlation
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.0000000 0.7713861 0.7713861 0.7713861 0.7713861
## [2,] 0.7713861 1.0000000 0.7713861 0.7713861 0.7713861
## [3,] 0.7713861 0.7713861 1.0000000 0.7713861 0.7713861
## [4,] 0.7713861 0.7713861 0.7713861 1.0000000 0.7713861
## [5,] 0.7713861 0.7713861 0.7713861 0.7713861 1.0000000
mod.gee2 <- gee(Count ~ trt*PostBase + offset(log(Weeks)),
id = ID, family = poisson(link = "log"),
corstr = "AR-M", data = epi_long)
## (Intercept) trtProgabide PostBase1
## 1.34760922 0.02753449 0.11183602
## trtProgabide:PostBase1
## -0.10472579
summary(mod.gee2)
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logarithm
## Variance to Mean Relation: Poisson
## Correlation Structure: AR-M , M = 1
##
## Call:
## gee(formula = Count ~ trt * PostBase + offset(log(Weeks)), id = ID,
## data = epi_long, family = poisson(link = "log"), corstr = "AR-M")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -4.327892 -1.327892 2.120474 10.440487 147.208867
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E.
## (Intercept) 1.31279985 0.1427491 9.1965551 0.1617122
## trtProgabide 0.01986517 0.1960117 0.1013468 0.2117125
## PostBase1 0.15228086 0.1682744 0.9049558 0.1114624
## trtProgabide:PostBase1 -0.12923296 0.2405021 -0.5373464 0.2597892
## Robust z
## (Intercept) 8.11812310
## trtProgabide 0.09383086
## PostBase1 1.36620870
## trtProgabide:PostBase1 -0.49745325
##
## Estimated Scale Parameter: 20.12528
## Number of Iterations: 3
##
## Working Correlation
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.0000000 0.8102249 0.6564644 0.5318838 0.4309455
## [2,] 0.8102249 1.0000000 0.8102249 0.6564644 0.5318838
## [3,] 0.6564644 0.8102249 1.0000000 0.8102249 0.6564644
## [4,] 0.5318838 0.6564644 0.8102249 1.0000000 0.8102249
## [5,] 0.4309455 0.5318838 0.6564644 0.8102249 1.0000000
# Note similar coefficient estimates and Wald tests
# GEE std. errs robust to covariance structure assumption